{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# <center> Apply backpropagation algorithm to wheet seeds datasets</center>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "import math\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_dir = '/home/lidong/Datasets/ML'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_dataset_from_file(path):\n",
    "    dataset = []\n",
    "    with open(path) as f:\n",
    "        for line in f:\n",
    "            row = line.rstrip('\\n').split()\n",
    "            dataset.append(list(map(lambda x: float(x.strip()), row[:-1])))\n",
    "            dataset[-1].append(int(row[-1].strip()))\n",
    "    # classify by raw values\n",
    "    class_values = set([row[-1] for row in dataset])\n",
    "    class_indexs = dict()\n",
    "    for i, value in enumerate(class_values):\n",
    "        class_indexs[value] = i\n",
    "    for row in dataset:\n",
    "        row[-1] = class_indexs[row[-1]]\n",
    "    return dataset, class_indexs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[[13.2, 13.66, 0.8883, 5.236, 3.232, 8.315, 5.056, 2],\n",
       " [11.84, 13.21, 0.8521, 5.175, 2.836, 3.598, 5.044, 2],\n",
       " [12.3, 13.34, 0.8684, 5.243, 2.974, 5.637, 5.063, 2]]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# test\n",
    "dataset, _ = load_dataset_from_file(os.path.join(dataset_dir, 'seeds_dataset.txt'))\n",
    "dataset[-3:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def normalize_dataset(dataset):\n",
    "    minmax = [[min(column), max(column)] for column in zip(*dataset)]\n",
    "    for row in dataset:\n",
    "        for i in range(len(row) - 1):\n",
    "            row[i] = round((row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0]), 4)\n",
    "    return dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# test\n",
    "# dataset = normalize_dataset(dataset)\n",
    "# print(dataset[:3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# targets = set([row[-1] for row in dataset])\n",
    "# targets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "class NeuralNetwork(object):\n",
    "    def __init__(self, ninput, nhiden, noutput):\n",
    "        self.ni = ninput\n",
    "        self.nh = nhiden\n",
    "        self.no = noutput\n",
    "        self.network = self.initialize_network(self.ni, self.nh, self.no)\n",
    "        \n",
    "    def train(self, dataset, lr, epochs):\n",
    "        for epoch in range(epochs):\n",
    "            for row in dataset:\n",
    "                expected = [0 for i in range(self.no)]\n",
    "                expected[row[-1]] = 1\n",
    "                inputs = row[:-1]\n",
    "                self.forward_propagate(self.network, inputs, self.activite_transfer)\n",
    "                self.backward_propagate(self.network, expected, self.activite_derivative)\n",
    "                self.update_weights(self.network, inputs, lr)\n",
    "        # print(self.network)\n",
    "        \n",
    "    def predict(self, inputs):\n",
    "        results = list() \n",
    "        for row in inputs:\n",
    "            outputs = self.forward_propagate(self.network, row, self.activite_transfer)\n",
    "            results.append(outputs.index(max(outputs)))\n",
    "        # print(results)\n",
    "        return results\n",
    "    \n",
    "    @staticmethod\n",
    "    def initialize_network(ni, nh, no):\n",
    "        # print(ni, nh, no)\n",
    "        network = list()\n",
    "        # hiden layer\n",
    "        network.append([{'weights': [random.random() for i in range(ni + 1)]} for j in range(nh)])\n",
    "        # output layer\n",
    "        network.append([{'weights': [random.random() for i in range(nh + 1)]} for k in range(no)])\n",
    "        # print(network)\n",
    "        return network\n",
    "    \n",
    "    @staticmethod\n",
    "    def activite_transfer(weights, inputs):\n",
    "        output = weights[-1] # bias\n",
    "        for i in range(len(weights) - 1):\n",
    "            output += weights[i] * inputs[i]\n",
    "        return 1.0 / (1.0 + math.exp(-output)) # activete function sigmiod\n",
    "    \n",
    "    @staticmethod\n",
    "    def activite_derivative(output):\n",
    "        return output * (1 - output)\n",
    "    \n",
    "    @staticmethod\n",
    "    def forward_propagate(network, inputs, activite_transfer_cb):\n",
    "        inputs_ = inputs\n",
    "        for layer in network:\n",
    "            output = []\n",
    "            for neuron in layer:\n",
    "                neuron['output'] = activite_transfer_cb(neuron['weights'], inputs_)\n",
    "                output.append(neuron['output'])\n",
    "            inputs_ = output\n",
    "        # print(inputs_)\n",
    "        return inputs_\n",
    "    \n",
    "    @staticmethod\n",
    "    def backward_propagate(network, expected, activite_derivative_cb):\n",
    "        # last layer: output layer\n",
    "        for k, neuron in enumerate(network[-1]):\n",
    "            # middle signnal: delta\n",
    "            error = -1 * (neuron['output'] - expected[k])\n",
    "            neuron['delta'] = error * activite_derivative_cb(neuron['output'])\n",
    "            \n",
    "        # other layers, full connect layers  \n",
    "        for l in reversed(range(len(network)-1)):\n",
    "            for j, neuron in enumerate(network[l]):\n",
    "                error = 0.0\n",
    "                for nr in network[l+1]:\n",
    "                    error += nr['delta'] * nr['weights'][j]\n",
    "                neuron['delta'] = error * activite_derivative_cb(neuron['output'])    \n",
    "                \n",
    "    @staticmethod\n",
    "    def update_weights(network, inputs, l_rate):\n",
    "        for i in range(len(network)):\n",
    "            if i == 0:\n",
    "                # remove the last column: target classification value\n",
    "                inputs_ = inputs\n",
    "            else:\n",
    "                # pre layer output as inputs\n",
    "                inputs_ = [neuron['output'] for neuron in network[i - 1]]\n",
    "                \n",
    "            for neuron in network[i]:\n",
    "                for i in range(len(inputs_)):\n",
    "                    # weights\n",
    "                    neuron['weights'][i] += l_rate * neuron['delta'] * inputs_[i]\n",
    "                # biases\n",
    "                neuron['weights'][-1] += l_rate * neuron['delta']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "def accuracy_metric(actual, predicted):          \n",
    "    correct = 0                                  \n",
    "    for i in range(len(actual)):\n",
    "        if actual[i] == predicted[i]:\n",
    "            correct += 1\n",
    "    return correct / float(len(actual)) * 100.0\n",
    "    \n",
    "# Evaluate an algorithm using a cross validation split\n",
    "def evaluate_algorithm(dataset, nfolds, ninput, nhiden, noutput, lr, epochs):\n",
    "    random.seed(0)\n",
    "    fold_list = list()\n",
    "    fold_size = int(len(dataset) / nfolds)\n",
    "    indices = [i for i in range(len(dataset))]\n",
    "    # random.shuffle(indices)\n",
    "    for i in range(nfolds):\n",
    "        fold_list.append([dataset[indices.pop()] for j in range(fold_size)])\n",
    "        \n",
    "    scores = list()\n",
    "    # each fold can use as test data\n",
    "    for test_data in fold_list:\n",
    "        train_data = list(fold_list)\n",
    "        train_data.remove(test_data)\n",
    "        train_data = sum(train_data, [])\n",
    "        nn = NeuralNetwork(ninput, nhiden, noutput)\n",
    "        nn.train(train_data, lr, epochs)\n",
    "        predicted = nn.predict(test_data)\n",
    "        actual = [row[-1] for row in test_data]\n",
    "        scores.append(accuracy_metric(actual, predicted))\n",
    "    return scores\n",
    " "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[[0.441, 0.5021, 0.5708, 0.4865, 0.4861, 0.1893, 0.3452, 0],\n",
       " [0.4051, 0.4463, 0.6624, 0.3688, 0.5011, 0.0329, 0.2152, 0],\n",
       " [0.3494, 0.3471, 0.8793, 0.2207, 0.5039, 0.2515, 0.1507, 0],\n",
       " [0.3069, 0.3161, 0.7931, 0.2393, 0.5339, 0.1942, 0.1408, 0],\n",
       " [0.5241, 0.5331, 0.8648, 0.4274, 0.6643, 0.0767, 0.323, 0]]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset, class_indices = load_dataset_from_file(os.path.join(dataset_dir, 'seeds_dataset.txt'))\n",
    "dataset = normalize_dataset(dataset)\n",
    "nfolds = 5\n",
    "ninput = len(dataset[0])-1\n",
    "nhiden = 5\n",
    "noutput = len(class_indices)\n",
    "dataset[0:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Score: [83.33333333333334, 88.09523809523809, 92.85714285714286, 90.47619047619048, 85.71428571428571]\n",
      "Mean Score: 88.095%\n"
     ]
    }
   ],
   "source": [
    "scores = evaluate_algorithm(dataset, nfolds, ninput, nhiden, noutput, lr=0.1, epochs=500)\n",
    "print('Score: %s' % scores)\n",
    "print('Mean Score: %.3f%%' % (sum(scores)/float(len(scores))))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
